# -*- coding: utf-8 -*-
"""
Created on Tue Jul 30 11:21:33 2019

@author: rklotz
"""

def interp(df_ves,var_list) :
    
    import numpy as np
    import pandas as pd
    
    #from sklearn.linear_model import LassoCV
    from sklearn.experimental import enable_iterative_imputer
    from sklearn.impute import IterativeImputer
    from sklearn.ensemble import ExtraTreesRegressor
    #from sklearn.neighbors import KNeighborsRegressor
    #from sklearn.preprocessing import LabelEncoder
    
    #ves_csv = r'C:\Users\rklotz\Dropbox\Boats\Stata\CA_tracks\data\wfr_wHist.csv'
    #df_ves = pd.read_csv( ves_csv, index_col=False  )
    
    #
    #group = df_ves.loc[:,'group']
    #le = LabelEncoder() 
    #df_ves["group_i"] = le.fit_transform(group)
    
    #40,50
    
    max_iter = 40 # 40
    n_est = 50 # 50 
    
    imp = IterativeImputer(max_iter=max_iter, 
            estimator=ExtraTreesRegressor(n_estimators=n_est,random_state=0),
            imputation_order="ascending")
    # impute iteratively, starting with feature with least missing (ascending order)
    # using all key X values and vessel type
    
    Xcols = var_list # ['beam','loa','draft','built','power_kw','speed_kts','dwt','cons_td','aux_sea']
    X1 = df_ves.loc[:,Xcols]
    dums = pd.get_dummies(df_ves.group,prefix='d') # vessel type dummies
    X = pd.concat([X1,dums],axis=1)
    
    # fit and create transformed X
    Ximp = pd.DataFrame( imp.fit_transform(X) ,columns=X.columns )
    Ximp = Ximp[Xcols]
    Ximp = Ximp.add_suffix('_f')
    
    # concat the interpolated values
    df_ves = pd.concat([df_ves,Ximp],axis=1)
    
    # create interpolation indicator
    # renaming columns
    for col in Xcols :
        df_ves[col+"_interp"] = df_ves[col].isnull().astype(int)
        df_ves = df_ves.rename( columns = { col : col+"_orig" , col+"_f" : col } )  
        
    
    
    return df_ves

#Xcols = ['beam','loa','draft','built','power_kw']
#X = df_ves.loc[:, Xcols]
#y = df_ves.speed_kts
#
#reg = LassoCV(cv=5, random_state=0).fit(X, y)
#df_ves.speed_hat = reg.predict(X)

#from sklearn.datasets import make_regression
#X, y = make_regression(noise=4, random_state=0)
